package com.zhao.wc;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;


//批处理 (DataSet) 支持离线数据
public class WordCount {
    public static void main(String[] args)  throws Exception{
        //创建执行环境
        ExecutionEnvironment env=ExecutionEnvironment.getExecutionEnvironment();

        //从文件中读取数据
        String inputPath="E:\\flinkTest\\src\\main\\resources\\text.txt";
        DataSet<String> inputDataSet = env.readTextFile(inputPath);

        //对数据集进行处理
       DataSet<Tuple2<String,Integer>>  resultSet = inputDataSet.flatMap(new MyflatMapper())
                //按照第一个位置对word分组
                .groupBy(0)
                //将第二个位置上对数据求和
                .sum(1);

        resultSet.print();

    }

    //自定义类实现FlatMapFunction
    public static class MyflatMapper implements FlatMapFunction<String,Tuple2<String,Integer>> {

        @Override
        public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
            //按空格分词
            String[] words=value.split(" ");
            //遍历所有ord,包成二元组
            for(String word:words){
                out.collect(new Tuple2<>(word,1));
            }
        }
    }
}
